import datetime
import time

from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.remote.webelement import WebElement

def test_selinum():
    # browser = webdriver.Chrome()
    # browser.get('http://www.baidu.com/')
    #
    if False:
        driver = webdriver.Chrome()
        driver.implicitly_wait(2)
        driver.set_page_load_timeout(3)
        driver.set_script_timeout(3)
        wait = WebDriverWait(driver, 10, 0.5)
        # url = "https://vip.stock.finance.sina.com.cn/mkt/#sh_a" # 沪市
        url = "https://vip.stock.finance.sina.com.cn/mkt/#sz_a"  # 深市
        try:
            driver.get(url)
        except Exception as e:
            print('time out after 3 seconds when loading page!!!', e)
            # d.execute_script('window.stop()')

        list_amount_ctrl = driver.find_element(By.CSS_SELECTOR, "#list_amount_ctrl a:nth-child(3)")
        print("list_amount_ctrl:", list_amount_ctrl)
        list_amount_ctrl.click()
        print("start wait...")

        print("finish wait")
        wait.until(lambda d: d.find_element(By.CSS_SELECTOR, ".tbl_wrap tbody tr:nth-child(50) th a"))
        dt = datetime.datetime.now()
        # next_page_el = driver.find_element(By.CSS_SELECTOR, ".nav2 .pages a")
        wait.until(lambda d: d.find_element(By.CSS_SELECTOR, "#list_pages_top2 a:last-child"))

        # print("next_page_el:", next_page_el.text, next_page_el)
        for i in range(36):
            fn = "sina_page_%s_%d.html" % (dt.strftime("%Y%m%d%H%M%S"), i)
            with open(fn, "w") as f:
                f.write(driver.page_source)
                #print(driver.page_source)
            next_page_el = driver.find_element(By.CSS_SELECTOR, "#list_pages_top2 a:last-child")
            next_page_el.click()
            time.sleep(0.2)
            wait.until(lambda d: d.find_element(By.CSS_SELECTOR, ".tbl_wrap tbody tr:nth-child(50) th a"))

    elif True:
        for i in range(36):
            with open('sina_page_20240412124843_%d.html' % i) as f:
                soup = BeautifulSoup(f.read(), 'html.parser')
                # print(soup)
                # data = soup.find_all('标签名')
                t = soup.select(".tbl_wrap tbody tr")
                for s in t:
                    # print("s:", type(s), s)
                    # print(s.th.a.text)
                    th = s.select("th")
                    # print('th:', len(th))
                    stock_el = th[0].a
                    if not stock_el:
                        print(th[0])
                        continue
                    stock_code = stock_el.text
                    # print(stock_code)
                    if stock_code[:2] not in ('sh', 'sz'):
                        continue
                    stock_name = th[1].a.a.text
                    print("stock_code:", stock_code, stock_name)
                # print("t:", t)


def start():
    print("get data start...")
    test_selinum()
    pass


